Study

Healthy participants do motor execution (ME) and motor imagery (MI) EEG neurofeedback (NF) on seperate days. On each day, they do four 60 trial blocks of NF training. For the ME task, on each trial, after a brief fixation period, an arrow appears indicating which hand must be moved during the subsequent five second NF period. During the NF period, participants receive real-time, contineous feedback indicating the extent to which the brain activity over their primary motor cortex is lateralized. The more their activity is lateralized, the greater the length of the horizontal bar presented on screen. Their goal is simply to maximize the length of the bar in the direction of the hand that they are moving on a particular trial.

The data of interest are the average power (amplitude squared) values of the ipsilateral and contralateral primary motor cortex electrodes (C3 and C4 according to the 10-20 international system) for each trial (240 total) and for each task (ME and NF). We are interested in whether any of these three within subject predictors - laterality (contralateral, ipsilateral), trial (1 to 240), and task (ME, MI) - affect brain activity. If the neurofeedback training works across the board, we expect to see an interaction between laterality and trial such that the difference between contralateral and ipsilateral signals, with contralateral having lower power than ipsilateral, increases with trial. A three-way interaction, such that task modulates the increase in lateralization with trial, would not be surprising however.

We’ve collected a single pilot participant. Therefore, we will examine the participant’s data and inspire ourselves from it to come up with plausible parameters with which to simulate data. The actual analyses will then be conducted on the simulated data, for which the population (vs. sample) parameters have been set. In this way, we will be able to determine the actual accuracy of the model, at least for one random sample.

Data Processing

Load Packages

library(MASS)
library(signal)
## 
## Attaching package: 'signal'
## The following objects are masked from 'package:stats':
## 
##     filter, poly
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, signal, stats
## lag():    dplyr, stats
## select(): dplyr, MASS
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.14.1, packaged: 2016-12-28 14:55:41 UTC, GitRev: 5fa1e80eb817)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(bspec)
## 
## Attaching package: 'bspec'
## The following object is masked from 'package:stats':
## 
##     acf
## The following object is masked from 'package:base':
## 
##     sample
library(corrplot)

Load and Tidy Data

We load and concatnate the data files. We set my data types and factor levels. We convert the data to long format.

setwd("/Users/ghislaindentremont/Documents/Experiments/Neurofeedback/kevin_pilot")

raw = map_df(
  .x = list.files(
    pattern = "p101_raw"
  )
  , .f = function(file) {
    read_csv(
      file = file
      , col_names = FALSE
    )
  }
)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_integer(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X11 = col_integer(),
##   X12 = col_integer(),
##   X13 = col_integer(),
##   X14 = col_integer(),
##   X16 = col_integer(),
##   X17 = col_integer()
## )
## See spec(...) for full column specifications.
names(raw) = c('id', 'age', 'sex', 'hand', 'year', 'month', 'day', 'hour', 'minute', 'seconds', 'block' , 'block_cond', 'trial', 'cue_loc', 'iti', 'trial_stage', 'iteration', 'time', 'Ch1', 'Ch2', 'Ch3', 'Ch4', 'Ch5', 'Ch6', 'Ch7', 'Ch8', 'Ch9', 'Ch10', 'Ch11', 'Ch12', 'Ch13', 'Ch14')

raw$id = factor(raw$id)
raw$sex = factor(raw$sex, levels = c(1, 2), labels = c("male", "female"))
raw$hand = factor(raw$hand, levels = c(1, 2), labels = c("left", "right"))
raw$block = factor(raw$block, levels = c(1:5), labels = c("1", "2", "3", "4", "5"))
raw$block_cond = factor(raw$block_cond, levels = c(1, 2), labels = c("MI", "ME"))
raw$cue_loc = factor(raw$cue_loc, levels = c(1, 2), labels = c("left", "right"))
raw$trial_stage = factor(raw$trial_stage, levels = c(1:4), labels = c("iti", "fixation", "cue", "NF"))

raw_long = raw %>%
  gather(channel, signal, Ch1:Ch14, factor_key = TRUE)
Add adjusted time column

We confirm that the estimated times match the actual times to within 10 ms. We will be using the estimated trial times as they are consistent across trials.

raw_long = raw_long %>%
  group_by(block_cond, block, trial, trial_stage, channel) %>%
  mutate(
    time_adj = time - min(time)
    , time_est = seq(0, (1/128)*(length(id)-1), by = 1/128)
    )

range(raw_long$time_est - raw_long$time_adj)
## [1] -0.0875000  0.0984375
Filter data

We apply a 6th order butterworth filter to the experiment data to isolate signals in the frequency band of interest: 13 to 30 Hz. The filter is applied forward and backwards to minimize phase shifts. Doing this ‘sharp’ filtering instead of frequency spectrum analyses allows us to keep the time domain for plots of continuous even-related activity. It is also appears to be common practice in the literature. After filtering, we create a new column for the power signal (the squared signal).

bu2 = butter(6, c(13,30)/(128/2), type = 'pass')
filtered_zero = raw_long %>%
  group_by(block_cond, channel) %>%
  mutate(butter_filtered = signal::filtfilt(bu2, signal)) %>%
  mutate(filtered_signal = as.numeric(butter_filtered)) %>%
  mutate(power = filtered_signal^2)
Get rid of practice block

The practice blocks serve only to familiarize the participant with the design and create a natural buffer for the filter applied above.

filtered_zero = filtered_zero %>%
  dplyr::filter(block != "1")
Create laterality variable

To examine the outcome of interest (laterality), we create a new variable.

filtered_zero = filtered_zero %>%
  dplyr::filter(channel == "Ch8" | channel == "Ch12") %>%
  mutate(laterality = ifelse(
    (cue_loc == "right" & channel == "Ch8") | (cue_loc == "left" & channel == "Ch12")
      , "contra"
      , "ipsi"
    )
  ) 
Get baseline

The baselines are based on an average over both channels of interest (C3 and C4) during fixation period in each trial. We expect baseline values for both channels to be very similar.

baselines = filtered_zero %>%
  dplyr::filter(trial_stage == "fixation") %>%
  group_by(block_cond, block, trial) %>%
  summarize(baseline = mean(power))
Get relative power

We get an estimate of relative power for each time point, for each trial, for each laterality. We define relative power as power over the baseline for the trial of interest.

relative_power = filtered_zero %>%
  dplyr::filter(trial_stage != "iti") %>%
  group_by(block_cond, block, trial, laterality) %>%
  mutate(time_est_ERD = seq(0, (1/128)*(length(id)-1), by = 1/128)) %>%
  left_join(baselines) %>%
  mutate(relative_power = (power/baseline))
## Joining, by = c("block", "block_cond", "trial")
Get average relative power, for each NF trial

In getting the average relative power for each trial, we only average between the one and four second marks of the NF stage of each trial, which spans five seconds total. This selection allows us to get estimates of average relative power that are not dependent on the ‘warm-up’ or ‘cool-down’ that the participant may manifest in each trial. We take the natural logarithm of average relative power for each trial. The specific reason for this is established in the following section.

mean_relative_power_by_NF_trial = relative_power %>%
  dplyr::filter(trial_stage == "NF", time_est < 4, time_est > 1) %>%
  group_by(block_cond, block, trial, laterality) %>% 
  summarize(mean_relative_power = mean(relative_power)) %>%
  mutate(log_mean_relative_power = log(mean_relative_power))

Establishing Generative Model for Data

For the purpose of creating a statistical model for the data we must infer an appropriate generative model for the data. Relative power is bounded by zero and positive infinity. Therefore, we first examine the likelihood of the data coming from a lognormal distribution.

Lognormal

Plotting raw data

First, we plot the distribution of trial-wise relative power values, for each laterality (they should be approximately the same), and each block condition. There does appear to be some difference in the shape of the distributions between the block conditions, but no noticeable difference between distributions for contralateral and ipsilateral conditions. The log-transformed data may very well come from a normal population.

mean_relative_power_by_NF_trial %>%
  ggplot()+
    geom_histogram(aes(x=mean_relative_power, y=..density..), bins = 50)+
    facet_grid(laterality~block_cond)+
    ggtitle("Non-transformed Relative Power")+
    xlab("Relative Power (Power/Baseline)")+
    ylab("Density")
## Warning: Removed 2 rows containing non-finite values (stat_bin).

mean_relative_power_by_NF_trial %>%
  ggplot(aes(x=log_mean_relative_power))+
    geom_histogram(aes(y=..density..), bins = 50)+
    facet_grid(laterality~block_cond)+
    ggtitle("Log-transformed Relative Power")+
    xlab("Log Relative Power Log(Power/Baseline)")+
    ylab("Density")
## Warning: Removed 2 rows containing non-finite values (stat_bin).

mean_relative_power_by_NF_trial %>%
  ggplot(aes(x=log_mean_relative_power))+
    geom_histogram(aes(y=..density..), bins = 50)+
    facet_grid(.~block_cond)+
    ggtitle("Combined Data: Log-transformed Relative Power")+
    xlab("Log Relative Power Log(Power/Baseline)")+
    ylab("Density")
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Simulating samples from normal distribution

The simulations below show that it is very likely that the transformed data come from normal distribution. However, it is clear that the model should account for an effect of block condition on both the mean and standard deviation of the distribution of log relative power values.

y = replicate(50, {
  rnorm(
    240
    , mean(mean_relative_power_by_NF_trial$log_mean_relative_power,na.rm=T)
    , sd(mean_relative_power_by_NF_trial$log_mean_relative_power,na.rm=T)
  )
  })

y_tibb = as_tibble(y) %>% 
  gather(simulation, value, 1:ncol(y))

mean_relative_power_by_NF_trial %>%
  ggplot()+
    geom_density(aes(x=log_mean_relative_power, y=..density.., color=block_cond), size=1)+
    geom_density(data=y_tibb, aes(x=value, y=..density.., group=simulation), size=0.1, position="identity")+
    ggtitle("MI AND ME")+
    xlab("Log Relative Power Log(Power/Baseline)")+
    ylab("Density")
## Warning: Removed 2 rows containing non-finite values (stat_density).

mean_sum = mean_relative_power_by_NF_trial %>%
  group_by(block_cond) %>%
  summarize(
    mean = mean(log_mean_relative_power, na.rm = T)
    , sd = sd(log_mean_relative_power, na.rm = T)
    )

y_MI = replicate(50, {
  rnorm(
    240
    , mean_sum$mean[mean_sum$block_cond == "MI"]
    , mean_sum$sd[mean_sum$block_cond == "MI"]
  )
  })

y_MI_tibb = as_tibble(y_MI) %>%
  gather(simulation, value, 1:ncol(y_MI))

mean_relative_power_by_NF_trial %>%
  dplyr::filter(block_cond == "MI")%>%
  ggplot()+
    geom_density(aes(x=log_mean_relative_power, y=..density..), color="red", size=1)+
    geom_density(data=y_MI_tibb, aes(x=value, y=..density.., group=simulation), size=0.1, position="identity")+
    ggtitle("MI")+
    xlab("Log Relative Power Log(Power/Baseline)")+
    ylab("Density")
## Warning: Removed 2 rows containing non-finite values (stat_density).

y_ME = replicate(50, {
  rnorm(
    240
    , mean_sum$mean[mean_sum$block_cond == "ME"]
    , mean_sum$sd[mean_sum$block_cond == "ME"]
  )
  })

y_ME_tibb = as_tibble(y_ME) %>%
  gather(simulation, value, 1:ncol(y_ME))

mean_relative_power_by_NF_trial %>%
  dplyr::filter(block_cond == "ME")%>%
  ggplot()+
    geom_density(aes(x=log_mean_relative_power, y=..density..), color="red", size=1)+
    geom_density(data=y_ME_tibb, aes(x=value, y=..density.., group=simulation), size=0.1, position="identity")+
    ggtitle("ME")+
    xlab("Log Relative Power Log(Power/Baseline)")+
    ylab("Density")

Average ERD timecourse

We generate am average ERD timecourse for the pilot participant. This is done by (1) averaging over trials, (2) smoothing (sma), and (3) taking the log of the average relative power. We only visualize the first 30 trials of each block because of a timing issue present in the other 30 trials in each block. The graphs display event-related desynchronization (reduction in relative power) in the ME condition, but not in the MI condition.

relative_power %>%
  dplyr::filter(trial <= 30) %>%
  group_by(block_cond, time_est_ERD, laterality) %>%
  summarize(avg_relative_power = mean(relative_power, na.rm = T)) %>%
  group_by(block_cond, laterality) %>%
  mutate(sma_relative_power = stats::filter(avg_relative_power, rep(1/128, 128), sides=2)) %>%
  mutate(log_sma_relative_power = log(sma_relative_power)) %>%
  mutate(log_avg_relative_power = log(avg_relative_power)) %>%
  ggplot()+
    geom_line(aes(x=time_est_ERD, y=log_avg_relative_power, color = laterality), alpha = 0.2)+
    geom_line(aes(x=time_est_ERD, y=log_sma_relative_power, color = laterality))+
    facet_grid(.~block_cond)+
    xlab("Trial Time (s)")+
    ylab("Log of Average Relative Power")+
    ggtitle("First 30 Trials") +
    geom_vline(xintercept = 2, linetype = "dashed")+
    geom_vline(xintercept = 2 + 1.25, linetype = "dashed")+
    geom_hline(yintercept=0, linetype = "dashed")+
    ylim(c(-1, 1))  # no avoid outlying noise at the end
## Warning: Removed 254 rows containing missing values (geom_path).

Create Data

We take the one pilot participant to be representative of the entire population. We inspire ourselves from this single participant’s data to select population parameters from which to sample data for our simulation. This is clear exercise in inductive reasoning. However, we don’t care so much about the accuracy of our population parameters. We care about dealing with population intercepts and effects that are largely plausible in both variability and magnitude.

Pilot Condition Values

Mean log relative power

The table of log relative power, averaged across trials, shows a near 0.2 effect of task (ME - MI) on log relative power.

mean_relative_power_by_NF_trial %>%
  group_by(block_cond, laterality) %>%
  summarize(meanz = mean(log_mean_relative_power, na.rm = T))
## Source: local data frame [4 x 3]
## Groups: block_cond [?]
## 
##   block_cond laterality       meanz
##       <fctr>      <chr>       <dbl>
## 1         MI     contra -0.08095410
## 2         MI       ipsi -0.09627813
## 3         ME     contra -0.29069231
## 4         ME       ipsi -0.31222481

The plot of log relative power as a function of trial (averaging over task and laterality conditions) indicates a zero slope for trial. The plot also makes it easy to visualize the spread of data points (noise) and the overall intercept which is nearing 0.2.

mean_relative_power_by_NF_trial = mean_relative_power_by_NF_trial %>%
  mutate(total_trial = (as.numeric(block) - 2)*60 + trial)

mean_relative_power_by_NF_trial %>%
  group_by(total_trial) %>%
  summarize(regress = mean(log_mean_relative_power)) %>%
  ggplot(aes(x=total_trial, y=regress))+
    geom_smooth()+
    geom_point()+
    geom_hline(yintercept=0, linetype = "dashed")+
    ylab("log of relative power")+
    xlab("total trial")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

The final plot offers a nice visual of the effect task and an indication that the pilot participant expresses no interaction effects among the conditions.

mean_relative_power_by_NF_trial$grouping = c(rep(c("MI contra", "MI ipsi"),240),  rep(c("ME contra", "ME ipsi"),240))

mean_relative_power_by_NF_trial %>%
  ggplot(aes(x=total_trial, y=log_mean_relative_power,  color=grouping))+
    geom_smooth()+
    geom_hline(yintercept=0, linetype = "dashed")+
    ylab("log of relative power")+
    xlab("total trial")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

Trial-by-trial noise

We also want to see if the amount of noise is variable among sub-groupings for this participant. At the very least, I’ll want to assume that noise is variable among participants.

There appears to be more variablity in the ME condition compared to the MI condition. However, no such discrepency in noise seems to exists between laterality conditions. Noise appears pretty consistent trough trials.

sd(mean_relative_power_by_NF_trial$log_mean_relative_power, na.rm = T)
## [1] 0.4198894
mean_relative_power_by_NF_trial %>%
  group_by(block_cond, laterality) %>%
  summarize(SDs = sd(log_mean_relative_power, na.rm = T))
## Source: local data frame [4 x 3]
## Groups: block_cond [?]
## 
##   block_cond laterality       SDs
##       <fctr>      <chr>     <dbl>
## 1         MI     contra 0.3443355
## 2         MI       ipsi 0.3252838
## 3         ME     contra 0.4667637
## 4         ME       ipsi 0.4678820
mean_relative_power_by_NF_trial %>%
  ggplot(aes(x=total_trial, y=log_mean_relative_power))+
    geom_point()+
    facet_grid(block_cond ~ laterality)+
    geom_hline(yintercept=0, linetype = "dashed")+
    ylab("log of mean relative power")
## Warning: Removed 2 rows containing missing values (geom_point).

Generate data

We have three within subject factors: (1) task, (2) trial, (3) laterality. The generative model is structured as follows. The log relative power for a given participant for a given trial is sampled from a normal distribution with a mean ‘power’, and a standard deviation ‘noise’. ‘Power’ is specified by the full model: power = B0 + B1(task) + B2(age) + B3(trial) + B4(task x trial) + B5(task x laterality) + B6(trial x laterality) + B7(task x laterality x trial), where the beta coefficients are sampled from a multivariate normal distribution, with specified mean and standard deviation for each coefficient, and a covariance matrix to account for the correlation among these coefficients. ‘Noise’, which varies among participants and conditions, is sampled from a lognormal distribution with a mean specified by the full model, and a set standard deviation. In theory, it would be possible to introduce correlations among the Beta coefficients of the ‘Noise’ and ‘Power’ outcomes. However, for simplicity, we don’t do this.

Basically, data are sampled from a generative model in which the overall average log relative power and the effect of any particular condition or trial on log relative power are allowed to vary among participants. The variability of the log relative power in a particular condition (what we’ve call ‘noise’) is itself allowed to vary among participants, and among conditions.

‘Power’ coefficients

Main effects, two-way interactions, and a three-way interaction are specficied. Their effect sizes are either 0.5, 1, and 5. Many of these effects were not seen in the pilot participant. Instead these effects that are broadly what we hypothesize finding with a full dataset, with some effects (like the three-way interaction) being a little extreme in it’s variabilty (cohen’s d of 5). The estimates of noise, however, were mainly inspired by the pilot data.

set.seed(1)
N = 40 #number of subjects

### population parameters
# overall mean
intercept_mean = -0.2
intercept_sd = 0.2

## main effects
# MI - ME
task_effect_mean = 0.2
task_effect_sd = 0.2
# trial slope
trial_effect_mean = -0.1/240
trial_effect_sd = 0.2/240
# ipsi - contra
laterality_effect_mean = 0.1
laterality_effect_sd = 0.2

## 2-way interactions 
# LI increases with trial
trial_laterality_interaction_mean = 0.2/240
trial_laterality_interaction_sd = 0.2/240
# task does not effect the slope of trial 
trial_task_interaction_mean = 0/240
trial_task_interaction_sd = 0.1/240
# task does not effect overall LI
task_laterality_interaction_mean = 0
task_laterality_interaction_sd = 0.1

## 3-way interaction
# ME makes the increase of LI with trial greater than does MI
trial_task_laterality_interaction_mean = -0.5/240
trial_task_laterality_interaction_sd = 0.1/240
  
# indicates that the more ERD for a given participants, the greater the difference between tasks
intercept_task_effect_correlation = -0.5
# indicates that the more ERD for a given participants, the more ERD as a function of trial
intercept_trial_effect_correlation = 0.3
# indicates that the more ERD for a given participants, the greater LI overall
intercept_laterality_effect_correlation = -0.5

#covariance matrix
v = matrix(
    data = c(
        intercept_sd^2
        , intercept_sd*task_effect_sd*intercept_task_effect_correlation
        , intercept_sd*trial_effect_sd*intercept_trial_effect_correlation
        , intercept_sd*laterality_effect_sd*intercept_laterality_effect_correlation
        , 0
        , 0
        , 0
        , 0

        , intercept_sd*task_effect_sd*intercept_task_effect_correlation
        , task_effect_sd^2
        , 0
        , 0
        , 0
        , 0
        , 0
        , 0

        , intercept_sd*trial_effect_sd*intercept_trial_effect_correlation
        , 0
        , trial_effect_sd^2
        , 0
        , 0
        , 0
        , 0
        , 0
        
        , intercept_sd*laterality_effect_sd*intercept_laterality_effect_correlation
        , 0
        , 0
        , laterality_effect_sd^2
        , 0
        , 0
        , 0
        , 0

        , 0
        , 0
        , 0
        , 0
        , trial_laterality_interaction_sd^2
        , 0
        , 0
        , 0
        
        , 0
        , 0
        , 0
        , 0
        , 0
        , trial_task_interaction_sd^2
        , 0
        , 0
    
        , 0
        , 0
        , 0
        , 0
        , 0
        , 0
        , task_laterality_interaction_sd^2
        , 0
        
        , 0
        , 0
        , 0
        , 0
        , 0
        , 0
        , 0
        , trial_task_laterality_interaction_sd^2
        
    )
    , nrow = 8
    , ncol = 8
)
Sample subject ‘power’ coefficients

We sample subject coefficients from multivariate normal distribution.

subjCoefs = MASS::mvrnorm(N
                          ,c(
                            intercept_mean
                            
                            ,task_effect_mean
                            , trial_effect_mean
                            , laterality_effect_mean
                            
                            , trial_laterality_interaction_mean
                            , trial_task_interaction_mean
                            , task_laterality_interaction_mean
                            
                            , trial_task_laterality_interaction_mean
                            )
                          ,v
                          )
Examine sample correlations

We plot sample correlations to establish the extent to which the random population draw was representative (i.e. no substantial outliers; e.g. 0.4-0.5).

C = cor(subjCoefs)
corrplot(C, main = "sample correlations")

View data and contrasts

We view the dataframe containing the simulated data along with the condition contrasts.

dat = expand.grid(
    subj = 1:N
    # intercept
    , I = 1
    # task
    , e1 = c(-.5,.5)
    # trial
    , e2 = 1:240 # -120  # center trial so that setting this effect to zero means average
    # laterality
    , e3 = c(-.5,.5)
)

nrow(dat) == 240*2*2*N
## [1] TRUE
# trial_laterality_interaction_mean
dat$in1 = dat$e2 * dat$e3
# trial_task_interaction_mean
dat$in2 = dat$e2 * dat$e1
# task_laterality_interaction_mean
dat$in3 = dat$e1 * dat$e3

# trial_task_laterality_interaction_mean
dat$in4 = dat$e1 * dat$e2 * dat$e3

dat$obs = rowSums( subjCoefs[dat$subj,] * as.matrix(dat[,2:ncol(dat)],ncol=ncol(dat)) )
head(dat[dat$subj==1,])
##     subj I   e1 e2   e3  in1  in2   in3   in4        obs
## 1      1 1 -0.5  1 -0.5 -0.5 -0.5  0.25  0.25 -0.5492530
## 41     1 1  0.5  1 -0.5 -0.5  0.5 -0.25 -0.25 -0.1870011
## 81     1 1 -0.5  2 -0.5 -1.0 -1.0  0.25  0.50 -0.5510015
## 121    1 1  0.5  2 -0.5 -1.0  1.0 -0.25 -0.50 -0.1876894
## 161    1 1 -0.5  3 -0.5 -1.5 -1.5  0.25  0.75 -0.5527500
## 201    1 1  0.5  3 -0.5 -1.5  1.5 -0.25 -0.75 -0.1883777
‘Noise’ coefficients

We set the population parameters for noise. We plot the (lognormal) distribution of participant noise values for each task condition, for which we’ve set an effect. These are mainly inspired by the pilot data.

noise_mean = 0.4
noise_sd = 0.2
noise_task_effect = 0.3

# overall mean noise
curve(dlnorm(x, log(noise_mean)-noise_sd/2, noise_sd), ylab = "density", xlab = "population noise", main = "overall mean")

# in ME task
curve(dlnorm(x, log(noise_mean)-noise_sd/2 + noise_task_effect/2, noise_sd), ylab = "density", xlab = "population noise", main = "+ task effect")

# in MI task
curve(dlnorm(x, log(noise_mean)-noise_sd/2 - noise_task_effect/2, noise_sd), ylab = "density", xlab = "population noise", main = "- task effect")

Sample subject ‘noise’ coefficients

We sample the noise coefficients from a lognormal distribution with the set population parameter values. We then plot the sampled participant-wise noise values.

noise_subj = dat %>%
  group_by(subj, e1) %>%
  summarize(noise_subj = rlnorm(1, (log(noise_mean) - noise_sd/2) - e1*noise_task_effect, noise_sd))

hist(noise_subj$noise_subj, xlab = "sample noise", main = "")

Sample each observation with noise

We complete the data creation process by sampling log relative power values with noise for each trial, for each participant.

dat = dat %>%
  left_join(noise_subj)
## Joining, by = c("subj", "e1")
dat$log_relative_power = rnorm(nrow(dat), dat$obs, dat$noise_subj)
View sample

We generate plots to visualize the sample at the participant level. The sample data look largely similar to the pilot data in their central tendency and disersion. Of course, we’ve enforced certain effects (especially the three-way interaction) that were not present in the pilot data and therefore those are noticeably different.

names(dat)[3:5] = c("task", "trial", "laterality")
dat$task = factor(dat$task, levels = c(-.5, .5), labels = c("ME", "MI"))
dat$trial = dat$trial 
dat$laterality = factor(dat$laterality, levels = c(-.5, .5), labels = c("contra", "ipsi"))

hist(dat$log_relative_power, main = "", xlab = "log relative power")

dat %>% 
  dplyr::filter(subj <= 4) %>%
  ggplot()+
    geom_point(aes(x=trial, y=log_relative_power, color = laterality), size = 0.5, alpha=0.4)+
    geom_line(aes(x=trial, y=obs, color = laterality), size=1)+
    geom_hline(yintercept=0, linetype = "dashed")+
    facet_grid(subj~task)+
    xlab("total trial")+
    ylab("log relative power")

dat = dat[,c("subj", "task", "trial", "laterality", "log_relative_power")]
dat = dat[order(dat$subj,dat$task,dat$trial,dat$laterality),]

# readr::write_csv(dat,path='dat.csv')